/* 
Creates the figures and tables for the analysis of the CUT reform in the incumbent worker sample
	// input: inclevel_dataset
	// output: 
	- atblemp_femcb_did; atblemp_het_by_educ_DD; 
	- Table6.xlsx (sheets: table6)
*/ 

cap log close
cap log using "$logs/inclevel_figures_log", replace

* data toggles
local singest = 0 //0: no restriction; 1: worker's baseline establishment must be a single establishment firm
local bothFM = 1 //0: no restriction; 1: worker's baseline establishment must employ both men and women at basline
local covered = 1 //0: no restriction; 1: worker's baseline establishment must be in the geographic coverage of the CBA
local signing = 1 //0: no restriction; 1: worker's baseline establishment must be a signing establishment
local geoglevel = "microregion_mode" //"": for no geog-yr FES; "state_mode": for state-yr FEs; "microregion_mode": for microregion-yr FEs
local no2011 = 1 //0: include 2011; 1: exclude 2011
local restr = 0 //0: no restriction; 1: worker must be 18 or older and not be a in probation at baseline

** Load dataset at the incumbent worker level
use "$files/inclevel_dataset.dta", clear

* sample restrictions
if `singest' {
	keep if singestwork==1
}
if `bothFM' {
	keep if bothFMwork==1
}
if `covered' {
	keep if coverwork==1
}
if `signing' {
	keep if signwork==1
}
if `restr' {
	keep if blage>=18
	gen prob=(blten<=3)
	drop if prob==1
}

* time varying fixed effects
//industry
gen ind2d_num = floor(blind/1e3)
gen ind3d_num = floor(blind/1e2)
egen indyrFE = group(year ind2d_num)
//geography
rename blstate state_mode
rename blmr microregion_mode
egen geogyrFE = group(year `geoglevel')
//tenure
gen ten3m = floor(blten/3)
egen tenyrFE = group(year ten3m)

* for clustering
gen clustergrp = blempl

* event study var 
gen post = (year>=2015) if year!=.
gen treatpost = treatwork*post
gen postgender = post*gender
gen treatgender = treatwork*gender 

tab year, gen(yearj)
	
forvalues i = 1/7 {
	gen interj`i'=treatwork*yearj`i'
}

lab var interj1 "2011"
lab var interj2 "2012"
lab var interj3 "2013"
lab var interj4 "2014"
lab var interj5 "2015"
lab var interj6 "2016"
lab var interj7 "2017"

gen zero=0
lab var zero "2014"
replace interj4 = zero

* Inverse of employment: for weighting regressions
gen blfeminv=1/blfem
gen blmalinv=1/blmal

gen blinv=blfeminv if gender==1
replace blinv=blmalinv if gender==0

lab var blfeminv "Inverse of female employment at bl est"
lab var blmalinv "Inverse of male employment at bl est"
lab var blinv "Inverse of own-gender employment at bl est"

* Outcome variables
label var lnwage	"Log earnings"
label var atblemp	"At baseline employer"
gen indata = (notindata==0) if notindata!=.
label var indata	"Employed in the formal sector"

* Regressions
if `no2011' {
	drop if year==2011
	drop interj1
	local xl = 3.5
}
else {
	local xl = 4.5
}

mdesc lnwage atblemp indata

**FIGURE: INCUMBENT WOMEN'S RETENTION
foreach var of varlist atblemp {
	reghdfe `var' treatpost if gender==1 & childbearing==1 [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)
	local coeff = _b[treatpost]
	local coeff : di %04.3f `coeff'
	local serr = _se[treatpost]
	local serr : di %04.3f `serr'
	reghdfe `var' interj* if gender==1 & childbearing==1 [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)
	estimates store `var'

	#d ;
	coefplot (`var' ,connect(l) lpatt(solid)lcol(sourapple1)
	mcol(sourapple1)ciopts(lpatt(solid)lcol(sourapple1%50))
	text(0.03 4 "`coeff'" "(`serr')", color(sourapple1) size(medium) placement(se))), 
	omitted keep( interj* ) 
	yline(0, lc(sourapple4) lp(solid)) 
	xline(`xl', lc(sourapple4) lp(dash)) 
	vertical ytitle("`: var label `var''", height(5) size(medlarge)) 
	ylabel(-0.02(0.01)0.03, labsize(medium) gmax gmin) 
	xlabel(, labsize(medium))
	plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(color(white)) ;
	#d cr 
	 
	graph export "$figures/`var'_femcb_did.tif", replace
	
}


*******************************************
* Statistical diffs by gender (table)
*******************************************
		
* Triple interactions for table on gender diffs
foreach y of varlist interj* {
	gen x`y' = `y'*gender
}
gen treatpostgender=treatwork*post*gender
egen ind2genyr=group(ind2d_num gender year)
egen mrgenyr=group(microregion_mode gender year)
egen tengenyr=group(ten3m gender year)

**TABLE: DIFFERENTIAL EFFECTS BY GENDER FOR INCUMBENT WORKERS		
local j=1
foreach y of varlist atblemp lnwage indata {
	
	eststo allworkers: 	reghdfe `y' treatpost treatpostgender [aweight=blinv], abs(fakeid_worker ind2genyr mrgenyr tengenyr) cluster(clustergrp)
			estadd scalar obs=e(N_full)
			qui su `y' if year == 2014 & treatwork == 1
			estadd scalar meandv=r(mean)
			estadd scalar r2sq=e(r2_a)
			estimates store pa`j'
			local ++j
			
}

*** Export ***
#d ;
estout pa1 pa2 pa3 using "$tables/T5_pa.txt", 
style(tab) mlabels(none) label collabels(none)
cells(b(star fmt(%9.3f)) se(par)) 
stats(meandv obs r2sq, fmt(%9.2fc %9.0fc %9.2fc) labels("mean depvar" "N" "R2")) 
drop(o.*, relax) keep(treatpost treatpostgender, relax) replace starlevels(* 0.10 ** 0.05 *** 0.01);
#d cr
preserve
	import delimited "$tables/T5_pa.txt", clear
	rename v1 coeffs
	rename v2 atblemp
	rename v3 lnwage
	rename v4 indata
	export excel using "$tables/Table6.xlsx", sheet("table6") cell(A1) firstrow(var) sheetreplace
restore

**Retention rates by 2015 (control with weights)
sum atblemp if year==2015 & treatwork==0 [aweight=blinv]
sum atblemp if year==2015 & gender==1 & treatwork==0 [aweight=blfeminv]
sum atblemp if year==2015 & gender==0 & treatwork==0 [aweight=blmalinv]

*Retention rates by 2015 (control w/o weights)
sum atblemp if year==2015 & treatwork==0 
sum atblemp if year==2015 & gender==1 & treatwork==0 
sum atblemp if year==2015 & gender==0 & treatwork==0 


*******************************************
* Retention by education
*******************************************			
			
cap drop quality
gen quality = 0 if blschool!=.
replace quality = 1 if blschool>=8 & blschool<.
replace quality = 2 if blschool>=11 & blschool<.
replace quality = 3 if blschool>=13 & blschool<.

**FIGURE: HETEROGENEITY IN WOMEN'S RETENTION BY EDUCATION GROUP
forvalues i = 0/3 {
	eststo DDf`i': reghdfe atblemp treatpost if gender==1 & quality==`i' [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)
}

#delimit ;
coefplot  (DDf0, aseq("ESch") \ DDf1, aseq("MSch") \ DDf2, aseq("HSch") \ 
			DDf3, aseq("+HSch") mcol(sourapple1)ciopts(lpatt(solid)lcol(sourapple1%50)))
			
			, 

vertical keep(treatpost) title("") 
yline(0, lc(sourapple4) lp(solid)) 
ytitle("DID estimate: Incumbent women's retention", size(medlarge)) 
ylabel(-0.01(0.01)0.04, labsize(medium) gmax gmin) 
xline(11, lp(dash))
xtitle("") xlabel(none, labsize(medium))  
plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(color(white));

graph export "$figures/atblemp_het_by_educ_DD.tif", replace;

#delimit cr

*Retention rates by 2015 (control with weights)
//Elementary school 
sum atblemp if year==2015 & treatwork==0 & quality==0 [aweight=blinv]
sum atblemp if year==2015 & gender==1 & treatwork==0 & quality==0 [aweight=blfeminv]
sum atblemp if year==2015 & gender==0 & treatwork==0 & quality==0 [aweight=blmalinv]
//Middle school 
sum atblemp if year==2015 & treatwork==0 & quality==1 [aweight=blinv]
sum atblemp if year==2015 & gender==1 & treatwork==0 & quality==1 [aweight=blfeminv]
sum atblemp if year==2015 & gender==0 & treatwork==0 & quality==1 [aweight=blmalinv]
//High school 
sum atblemp if year==2015 & treatwork==0 & quality==2 [aweight=blinv]
sum atblemp if year==2015 & gender==1 & treatwork==0 & quality==2 [aweight=blfeminv]
sum atblemp if year==2015 & gender==0 & treatwork==0 & quality==2 [aweight=blmalinv]
//More than high school
sum atblemp if year==2015 & treatwork==0 & quality==3 [aweight=blinv]
sum atblemp if year==2015 & gender==1 & treatwork==0 & quality==3 [aweight=blfeminv]
sum atblemp if year==2015 & gender==0 & treatwork==0 & quality==3 [aweight=blmalinv]

*Retention rates by 2015 (control w/o weights)
//Elementary school
sum atblemp if year==2015 & treatwork==0 & quality==0 
sum atblemp if year==2015 & gender==1 & treatwork==0 & quality==0 
sum atblemp if year==2015 & gender==0 & treatwork==0 & quality==0 
//Middle school 
sum atblemp if year==2015 & treatwork==0 & quality==1 
sum atblemp if year==2015 & gender==1 & treatwork==0 & quality==1 
sum atblemp if year==2015 & gender==0 & treatwork==0 & quality==1 
//High school 
sum atblemp if year==2015 & treatwork==0 & quality==2 
sum atblemp if year==2015 & gender==1 & treatwork==0 & quality==2 
sum atblemp if year==2015 & gender==0 & treatwork==0 & quality==2 
//More than high school 
sum atblemp if year==2015 & treatwork==0 & quality==3 
sum atblemp if year==2015 & gender==1 & treatwork==0 & quality==3 
sum atblemp if year==2015 & gender==0 & treatwork==0 & quality==3 


cap log close
